This script integrates the proteomics (TMT) dataset with the transcriptomic (RNAseq) space in order to explore the proteomic signature of the archetypes and compare their proteomic and transcriptomic profiles.
The integration is carried out by aligning the proteomic and the transcriptomic spaces using Procrustes superimposition.
In order to end up with the proteomics profile of the archetypes it makes sense to define the proteomics as the target matrix (X) and then superimpose the GE matrix (Y) to match the proteomics space. The archetypes position can then be imputed in the proteomics space. Then the proteomic profile of the archetypes can be calculated similarly to the way the gene expression profiles were caculated in 1_fit_archetypes.Rmd, by finding proteins whose abundance decreases significantly with distance from archetype position (or with limma).

knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())
knitr::opts_chunk$set(cache=TRUE, cache.extra = R.version)
suppressPackageStartupMessages({
  library(ggplot2)
  library(ggfortify)
  library(GGally)
  library(cowplot)
  library(Matrix)
  library(tidyverse)
  library(pins)
  library(grid)
  library(vegan)
})
# A function for predicting new data from procrustes fit object regardless of scaling
# Modified from vegan::predict.procrustes
# y.cs is centroid size of the original matrix (Y)
# y.means is the centroid of the original matrix (Y)
predict.proc <- function (object, newdata, y.cs=NULL, y.mean) {
  Y <- as.matrix(newdata)
  Y <- Y - matrix(y.mean, byrow=TRUE, nr=nrow(Y), nc=length(y.mean))
  if (object$symmetric & !is.null(y.cs)) Y <- Y/y.cs
  Y <- (object$scale * Y) %*% object$rotation
  colnames(Y) <- colnames(newdata)
  Y
}

Input

Loading clean gene expression and archetyppe data from the first step (1_fit_archetypes).
Loading proteomics data from Emory (TMT-labeled MS/MS-based; A & T Wingo).

# key between proteomics ID and project ID (from T&A Wingo)
protkey <- read_csv("data/metadata/proteomics_ROS_MAP_TRAITS_clean.csv",
                    col_types=cols(.default = "c")) %>% 
  dplyr::select(-Batch) 

# Load metadata with archetype classification and distances
arch.meta <- readRDS("analyses/rnaseq/1_fit_archetypes/archetypes_meta_k3p20_AD.RDS") %>%
  merge(protkey, by="projid", all.x=TRUE, all.y=FALSE) %>%
  arrange(rnaseq_id)

# Load Emory's proteomics data, transpose so that samples are rows, match with rnaseq_id and make it rownames
data.pt <- read_csv("data/proteomics_n391_residual_log2_batchMSsexPMIageStudy.csv") %>%
  column_to_rownames("X1") %>%
  drop_na() %>% # remove proteins with any na values
  t() %>% as.data.frame() %>% 
  rownames_to_column(var="proteomicsid") %>%
  merge(select(arch.meta, rnaseq_id, proteomicsid), by="proteomicsid", all=FALSE) %>%
  arrange(rnaseq_id) %>%
  select(-proteomicsid) %>%
  column_to_rownames("rnaseq_id")
  
# Load gene expression data for all samples and the three archetypes
arch.ge <- readRDS("analyses/rnaseq/1_fit_archetypes/archetypes_ge_k3p20_AD.RDS")

# Load gene expression PCA for all samples and the three archetypes
arch.pca <- readRDS("analyses/rnaseq/1_fit_archetypes/archetypes_pca_k3p20_AD.RDS")
varpc <- 100 * arch.pca[["all.pca"]]$sdev^2 / sum(arch.pca[["all.pca"]]$sdev^2) %>%
  round(2)
arch.pca <- arch.pca$arch.pca %>%
  as.data.frame() %>%
  rownames_to_column(var="rnaseq_id")

# Load top genes associated with each archetype
topgenes <- readRDS("analyses/rnaseq/2b_topgenes_annotations/topgenes.anno.RDS")

# Filter arch.meta to include only the intersection between ge and pt
meta.pt <- arch.meta %>%
    filter(rnaseq_id %in% rownames(data.pt))

# Load arcfit object
arcfit <- readRDS("analyses/rnaseq/1_fit_archetypes/arcfit_k3.RDS")[["AD"]][["p20"]]

# Load logFC for all genes
LogFC.ge <- readRDS("analyses/rnaseq/1_fit_archetypes/logFC_allres_k3p20_AD.RDS")
  
archnames <- c("Archetype_1", "Archetype_2", "Archetype_3")

Archetype classfication and position; k=3 p=20 AD only

Cge.arch <- arch.pca%>%
  filter( rnaseq_id %in% archnames) %>%
  column_to_rownames("rnaseq_id")

Cge <- arch.pca %>%
  filter(! rnaseq_id %in% archnames)

Cge %>%
  mutate(Archetype = arch.meta$Archetype, Diagnosis = arch.meta$Diagnosis) %>%
  ggplot() + 
      geom_point(aes(x=PC1, y=PC2, color=Archetype, shape=Diagnosis)) +
      geom_point(data=Cge.arch, aes(x=PC1, y=PC2), color="black", size=10, shape="*") +
      geom_label(data=Cge.arch, aes(x=PC1, y=PC2, label=c("1","2","3")), nudge_y = 3, size=3) +
      xlab(paste0("PC1 (", varpc[1], "%)")) +
      ylab(paste0("PC2 (", varpc[2], "%)")) +
      ggtitle("Transcriptomics PCA")

# extract samples and archetypes that are not in the proteomic data ("new data" for the predict function)
Cge.nd <- Cge %>%
  filter(! rnaseq_id %in% rownames(data.pt)) %>%
  column_to_rownames("rnaseq_id")

# extract samples and archetypes that occur in the proteomic data
Cge <- Cge %>%
  filter(rnaseq_id %in% rownames(data.pt)) %>%
  column_to_rownames("rnaseq_id")
PCpt <- prcomp(data.pt)

autoplot(PCpt, 
         data = meta.pt, 
         colour = 'Archetype',
         main="Proteomics PCA")

Cpt <- PCpt$x[,1:20]

Making sure samples are in the same order in both spaces

order(match(rownames(Cpt), rownames(Cge)))
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198

Comparing ordinations

Superimposing two configurations (ordinations in this case) with Procrustes superimposition involves translating them to the origin, scaling them to a common scale, and rotating one matrix (Y) onto the other (X, the target matrix). The roration matrix is found by decomposing the crossproduct of X and Y. The scaling is controlled by two parameters in the vegan::procrustes() function: scale and symmetric.
The code for vegan::procrustes() indicates the following:

symmetric==TRUE: both matrices are scaled by their centroid size (variance*(n-1)); otherwise the two matrices are translated and Y is rotated without any scaling.

scale==TRUE: the rotated Y matrix is scaled by the centroid size of the crossproduct, while X isn’t scaled at all; otherwise, if symmetric==TRUE both matrices end up with centroid size 1, if symmetric==FALSE both retain their original scale.

Using scale=FALSE and symmetric==FALSE (partial PS) is not sensible here because it does not put the spaces in a comparable scale, and the original scale isn’t meaningful here in itself (unlike size when objects like skulls are compared). We want the archetypes proteomic imputations to be comparable to the protemic profiles of the real patients.

Using scale=FALSE and symmetric==TRUE (full PS) is the usual practice in geometric morphometrics and preserve the procrustes distance between the two configurations as both matrices end up scaled to centroid size 1. However, predictions will not be in the original proteomics scale, which is what we want here eventually.

Using scale=TRUE and symmetric==FALSE is the only option for predict.procrustes(). X is in its original scale, Y is scaled by the crossproduct. It should be possible in principle to do it with symmetric PS (as often done in GM), but then it would require another step to translate the archetype imputations to the original proteomics space to get their proteomic profiles.

Using scale=TRUE and symmetric==TRUE is the only option for protest. It is said in the manual that it minimizes residuals compared to scale==FALSE and symmetric==TRUE, but it is probably more sensitive to samples that are outliers in one space but not in the other (dispersing the differences more “equaly” among the non-outliers, dragging everything else towards the outliers).

Eventually, the results are always presented in the coordinate basis of the target, with both matrices translated to the origin. The target matrix may be scaled depending on the above parameters but not rotated.

The plots below show the superimposed ordinations, once when the target matrix is the transcriptomics and once when it’s the proteomics. The arrows point to the target, black circles are the rotated matrix. The violin plots show the distribution of residuals ( sum of squared deviations between the two ordinations) broken down by archetype classes.

There is not much difference between the archetypes in how well their gene expression aligns with their protemics, regardless of which matrix is the target. Most of the times, archetype 1 is slightly less well aligned than the other two, when the target is transcriptomics.

cs.ge <- sqrt(sum(Cge^2)) # centroid size of ge ordination
mean.ge <- colMeans(Cge) # mean of ge ordination
Cge.arch <- as.matrix(Cge.arch)

pars <- expand.grid(c(TRUE,FALSE), c(TRUE,FALSE))[,2:1]
rownames(pars) <- c("TT", "TF", "FT", "FF")
res.ptL <- res.geL <- Carch.ptL <- Carch.geL <- distCl.ptL <- distCl.geL <- list()

# target is protemoics
for (p in rownames(pars)) {
  scale=pars[p,1]
  symmetric=pars[p,2]
  
  res <- vegan::procrustes(Cpt, Cge, scale=scale, symmetric=symmetric)
  # imputed position of archetypes (translated, scaled, and rotated)
  ap <- predict.proc(res, Cge.arch, y.cs=cs.ge, y.mean=mean.ge) 
  # distances between proteomics samples and imputed archetypes
  dist <- as.matrix(dist(rbind(ap, res$X), method="euclidean"))[-c(1:3),1:3] %>%
      as.data.frame()
  
  # classification of proteomic samples to imputed archetypes
  dist$Archetype <- apply(dist, 1, function(x){names(x)[x==min(x)]})
  
  Carch.ptL[[p]] <- ap 
  res.ptL[[p]] <- res
  distCl.ptL[[p]] <- dist

  rm(dist, res, ap)
}

# target is transcriptomics
for (p in rownames(pars)) {
  scale=pars[p,1]
  symmetric=pars[p,2]
  
  res <- vegan::procrustes(Cge, Cpt, scale=scale, symmetric=symmetric)
  # aligned position of archetypes (translated, and scaled if symmetric==TRUE)
  if(symmetric) {ap <- (Cge.arch-mean.ge)/cs.ge} else {ap <- Cge.arch-mean.ge}
  # distances between proteomics samples and aligned archetypes
  dist <- as.matrix(dist(rbind(ap, res$X), method="euclidean"))[-c(1:3),1:3] %>% 
      as.data.frame()
  # classification of proteomic samples to aligned archetypes
  dist$Archetype <- apply(dist, 1, function(x){names(x)[x==min(x)]}) 
  
  Carch.geL[[p]] <- ap 
  res.geL[[p]] <- res
  distCl.geL[[p]] <- dist

  rm(dist, res, ap)
  }

Ordinations

scale=TRUE symmetric=TRUE

res.pt <- res.ptL[["TT"]]
res.ge <- res.geL[["TT"]]

data.frame(dim1y=res.pt$Yrot[,1],
dim2y=res.pt$Yrot[,2],dim1x=res.pt$X[,1],
dim2x=res.pt$X[,2], arch=meta.pt$Archetype) %>%
  ggplot() +
  geom_segment(aes(x=dim1y,y=dim2y,xend=dim1x,yend=dim2x,colour=arch),arrow=arrow(length=unit(0.2,"cm"))) +
  geom_point(aes(x=dim1y, y=dim2y, colour=arch)) +
  geom_point(aes(x=dim1y, y=dim2y), shape=21, colour="black") +
  geom_label(data=Carch.ptL[["TT"]], aes(x=PC1, y=PC2, label=c("1","2","3")), size=3) +
 ggtitle("Target is proteomics")

data.frame(dim1y=res.ge$Yrot[,1],
dim2y=res.ge$Yrot[,2],dim1x=res.ge$X[,1],
dim2x=res.ge$X[,2], arch=meta.pt$Archetype) %>%
  ggplot() +
  geom_segment(aes(x=dim1y,y=dim2y,xend=dim1x,yend=dim2x,colour=arch),arrow=arrow(length=unit(0.2,"cm"))) +
  geom_point(aes(x=dim1y, y=dim2y, colour=arch)) +
  geom_point(aes(x=dim1y, y=dim2y), shape=21, colour="black") +
  geom_label(data=Carch.geL[["TT"]], aes(x=PC1, y=PC2, label=c("1","2","3")), size=3) +
  ggtitle("Target is transcriptomics")

scale=TRUE symmetric=FALSE

res.pt <- res.ptL[["TF"]]
res.ge <- res.geL[["TF"]]

data.frame(dim1y=res.pt$Yrot[,1],
dim2y=res.pt$Yrot[,2],dim1x=res.pt$X[,1],
dim2x=res.pt$X[,2], arch=meta.pt$Archetype) %>%
  ggplot() +
  geom_segment(aes(x=dim1y,y=dim2y,xend=dim1x,yend=dim2x,colour=arch),arrow=arrow(length=unit(0.2,"cm"))) +
  geom_point(aes(x=dim1y, y=dim2y, colour=arch)) +
  geom_point(aes(x=dim1y, y=dim2y), shape=21, colour="black") +
  geom_label(data=Carch.ptL[["TF"]], aes(x=PC1, y=PC2, label=c("1","2","3")), size=3) +
  ggtitle("Target is proteomics")

data.frame(dim1y=res.ge$Yrot[,1],
dim2y=res.ge$Yrot[,2],dim1x=res.ge$X[,1],
dim2x=res.ge$X[,2], arch=meta.pt$Archetype) %>%
  ggplot() +
  geom_segment(aes(x=dim1y,y=dim2y,xend=dim1x,yend=dim2x,colour=arch),arrow=arrow(length=unit(0.2,"cm"))) +
  geom_point(aes(x=dim1y, y=dim2y, colour=arch)) +
  geom_point(aes(x=dim1y, y=dim2y), shape=21, colour="black") +
  geom_label(data=Carch.geL[["TF"]], aes(x=PC1, y=PC2, label=c("1","2","3")), size=3) +
  ggtitle("Target is transcriptomics")

scale=FALSE symmetric=TRUE

res.pt <- res.ptL[["FT"]]
res.ge <- res.geL[["FT"]]

data.frame(dim1y=res.pt$Yrot[,1],
dim2y=res.pt$Yrot[,2],dim1x=res.pt$X[,1],
dim2x=res.pt$X[,2], arch=meta.pt$Archetype) %>%
  ggplot() +
  geom_segment(aes(x=dim1y,y=dim2y,xend=dim1x,yend=dim2x,colour=arch),arrow=arrow(length=unit(0.2,"cm"))) +
  geom_point(aes(x=dim1y, y=dim2y, colour=arch)) +
  geom_point(aes(x=dim1y, y=dim2y), shape=21, colour="black") +
  geom_label(data=Carch.ptL[["FT"]], aes(x=PC1, y=PC2, label=c("1","2","3")), size=3) +
  ggtitle("Target is proteomics")

data.frame(dim1y=res.ge$Yrot[,1],
dim2y=res.ge$Yrot[,2],dim1x=res.ge$X[,1],
dim2x=res.ge$X[,2], arch=meta.pt$Archetype) %>%
  ggplot() +
  geom_segment(aes(x=dim1y,y=dim2y,xend=dim1x,yend=dim2x,colour=arch),arrow=arrow(length=unit(0.2,"cm"))) +
  geom_point(aes(x=dim1y, y=dim2y, colour=arch)) +
  geom_point(aes(x=dim1y, y=dim2y), shape=21, colour="black") +
  geom_label(data=Carch.geL[["FT"]], aes(x=PC1, y=PC2, label=c("1","2","3")), size=3) +
  ggtitle("Target is transcriptomics")

scale=FALSE symmetric=FALSE

res.pt <- res.ptL[["FF"]]
res.ge <- res.geL[["FF"]]

data.frame(dim1y=res.pt$Yrot[,1],
dim2y=res.pt$Yrot[,2],dim1x=res.pt$X[,1],
dim2x=res.pt$X[,2], arch=meta.pt$Archetype) %>%
  ggplot() +
  geom_segment(aes(x=dim1y,y=dim2y,xend=dim1x,yend=dim2x,colour=arch),arrow=arrow(length=unit(0.2,"cm"))) +
  geom_point(aes(x=dim1y, y=dim2y, colour=arch)) +
  geom_point(aes(x=dim1y, y=dim2y), shape=21, colour="black") +
  geom_label(data=Carch.ptL[["FF"]], aes(x=PC1, y=PC2, label=c("1","2","3")), size=3) +
  ggtitle("Target is proteomics")

data.frame(dim1y=res.ge$Yrot[,1],
dim2y=res.ge$Yrot[,2],dim1x=res.ge$X[,1],
dim2x=res.ge$X[,2], arch=meta.pt$Archetype) %>%
  ggplot() +
  geom_segment(aes(x=dim1y,y=dim2y,xend=dim1x,yend=dim2x,colour=arch),arrow=arrow(length=unit(0.2,"cm"))) +
  geom_point(aes(x=dim1y, y=dim2y, colour=arch)) +
  geom_point(aes(x=dim1y, y=dim2y), shape=21, colour="black") +
  geom_label(data=Carch.geL[["FF"]], aes(x=PC1, y=PC2, label=c("1","2","3")), size=3) +
  ggtitle("Target is transcriptomics")

Residuals

Sum of squared deviations between patients position in proteomics and transcriptomics after superimposition.

scale=TRUE symmetric=TRUE

res.pt <- res.ptL[["TT"]]
res.ge <- res.geL[["TT"]]

# residuals between sample's position in proteomics and transcriptomics ordinations
PTdist <- rbind(
  meta.pt %>%
    select(Archetype, rnaseq_id) %>%
    mutate(PTdist=residuals(res.ge)) %>%
    mutate(Target="Transcriptomics"),
  meta.pt %>%
    select(Archetype, rnaseq_id) %>%
    mutate(PTdist=residuals(res.pt)) %>%
    mutate(Target="Proteomics")
)

# plot residuals by archetype
ggplot(PTdist, aes(x=Archetype, y=PTdist, fill=Target)) +
  geom_violin(trim=FALSE) +
  geom_boxplot(width=0.3, position=position_dodge(0.9)) + theme_minimal() +
  xlab(NULL)

scale=TRUE symmetric=FALSE

res.pt <- res.ptL[["TF"]]
res.ge <- res.geL[["TF"]]

# residuals between sample's position in proteomics and transcriptomics ordinations
PTdist <- rbind(
  meta.pt %>%
    select(Archetype, rnaseq_id) %>%
    mutate(PTdist=residuals(res.ge)) %>%
    mutate(Target="Transcriptomics"),
  meta.pt %>%
    select(Archetype, rnaseq_id) %>%
    mutate(PTdist=residuals(res.pt)) %>%
    mutate(Target="Proteomics")
)

# plot residuals by archetype
ggplot(PTdist, aes(x=Archetype, y=PTdist, fill=Target)) +
  geom_violin(trim=FALSE) +
  geom_boxplot(width=0.3, position=position_dodge(0.9)) + theme_minimal() +
  xlab(NULL)

# plot residuals by archetype
ggplot(filter(PTdist, Target=="Proteomics"), aes(x=Archetype, y=PTdist, fill=Target)) +
  geom_violin(trim=FALSE) +
  geom_boxplot(width=0.3, position=position_dodge(0.9)) + theme_minimal() +
  xlab(NULL)

scale=FALSE symmetric=TRUE

res.pt <- res.ptL[["FT"]]
res.ge <- res.geL[["FT"]]

# residuals between sample's position in proteomics and transcriptomics ordinations
PTdist <- rbind(
  meta.pt %>%
    select(Archetype, rnaseq_id) %>%
    mutate(PTdist=residuals(res.ge)) %>%
    mutate(Target="Transcriptomics"),
  meta.pt %>%
    select(Archetype, rnaseq_id) %>%
    mutate(PTdist=residuals(res.pt)) %>%
    mutate(Target="Proteomics")
)

# plot residuals by archetype
ggplot(PTdist, aes(x=Archetype, y=PTdist, fill=Target)) +
  geom_violin(trim=FALSE) +
  geom_boxplot(width=0.3, position=position_dodge(0.9)) + theme_minimal() +
  xlab(NULL)

scale=FALSE symmetric=FALSE

res.pt <- res.ptL[["FF"]]
res.ge <- res.geL[["FF"]]

# residuals between sample's position in proteomics and transcriptomics ordinations
PTdist <- rbind(
  meta.pt %>%
    select(Archetype, rnaseq_id) %>%
    mutate(PTdist=residuals(res.ge)) %>%
    mutate(Target="Transcriptomics"),
  meta.pt %>%
    select(Archetype, rnaseq_id) %>%
    mutate(PTdist=residuals(res.pt)) %>%
    mutate(Target="Proteomics")
)

# plot residuals by archetype
ggplot(PTdist, aes(x=Archetype, y=PTdist, fill=Target)) +
  geom_violin(trim=FALSE) +
  geom_boxplot(width=0.3, position=position_dodge(0.9)) + theme_minimal() +
  xlab(NULL)

Distances

Distance between each patient from each archetypes in the transcriptomic space compared to their distances in the proteomic space after superimposition, shown below when the target is proteomics and when the target is transcriptomics.

scale=TRUE symmetric=TRUE

# distance of proteomics samples from archetypes when proteomics is the target matrix
dist_pp <- distCl.ptL[["TT"]]  %>%
  select(-Archetype) %>%
  rownames_to_column("rnaseq_id") %>%
  pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Proteomics")

# distance of proteomics samples from archetypes when transcriptomics is the target matrix
dist_pg <- distCl.geL[["TT"]]  %>%
  select(-Archetype) %>%
  rownames_to_column("rnaseq_id") %>%
  pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Proteomics")

# distance of ge samples from archetypes in the transcriptomic space
dist_g <- meta.pt %>%
  select(rnaseq_id, Archetype_1:Archetype_3) %>%
  pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Transcriptomics")
  
merge(dist_g, dist_pp, all=TRUE, by=c("rnaseq_id", "dist_from")) %>%
  ggplot() +
  geom_point(aes(x=Transcriptomics, y=Proteomics, color=dist_from)) +
  ggtitle("Distances from archetypes when target is proteomics") +
  facet_wrap("dist_from", scale="free_y")

merge(dist_g, dist_pg, all=TRUE, by=c("rnaseq_id", "dist_from")) %>%
  ggplot() +
  geom_point(aes(x=Transcriptomics, y=Proteomics, color=dist_from)) +
  ggtitle("Distances from archetypes when target is transcriptomics") +
  facet_wrap("dist_from", scale="free_y")

scale=TRUE symmetric=FALSE

# distance of proteomics samples from archetypes when proteomics is the target matrix
dist_pp <- distCl.ptL[["TF"]]  %>%
  select(-Archetype) %>%
  rownames_to_column("rnaseq_id") %>%
  pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Proteomics")

# distance of proteomics samples from archetypes when transcriptomics is the target matrix
dist_pg <- distCl.geL[["TF"]]  %>%
  select(-Archetype) %>%
  rownames_to_column("rnaseq_id") %>%
  pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Proteomics")

# distance of rnaseq samples from archetypes in the transcriptomic space
dist_g <- meta.pt %>%
  select(rnaseq_id, Archetype_1:Archetype_3) %>%
  pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Transcriptomics")
  
merge(dist_g, dist_pp, all=TRUE, by=c("rnaseq_id", "dist_from")) %>%
  ggplot() +
  geom_point(aes(x=Transcriptomics, y=Proteomics, color=dist_from)) +
  ggtitle("Distances from archetypes when target is proteomics") +
  facet_wrap("dist_from", scale="free_y")

merge(dist_g, dist_pg, all=TRUE, by=c("rnaseq_id", "dist_from")) %>%
  ggplot() +
  geom_point(aes(x=Transcriptomics, y=Proteomics, color=dist_from)) +
  ggtitle("Distances from archetypes when target is transcriptomics") +
  facet_wrap("dist_from", scale="free_y")

scale=FALSE symmetric=TRUE

# distance of proteomics samples from archetypes when proteomics is the target matrix
dist_pp <- distCl.ptL[["FT"]]  %>%
  select(-Archetype) %>%
  rownames_to_column("rnaseq_id") %>%
  pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Proteomics")

# distance of proteomics samples from archetypes when transcriptomics is the target matrix
dist_pg <- distCl.geL[["FT"]]  %>%
  select(-Archetype) %>%
  rownames_to_column("rnaseq_id") %>%
  pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Proteomics")

# distance of rnaseq samples from archetypes in the transcriptomic space
dist_g <- meta.pt %>%
  select(rnaseq_id, Archetype_1:Archetype_3) %>%
  pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Transcriptomics")
  
merge(dist_g, dist_pp, all=TRUE, by=c("rnaseq_id", "dist_from")) %>%
  ggplot() +
  geom_point(aes(x=Transcriptomics, y=Proteomics, color=dist_from)) +
  ggtitle("Distances from archetypes when target is proteomics") +
  facet_wrap("dist_from", scale="free_y")

merge(dist_g, dist_pg, all=TRUE, by=c("rnaseq_id", "dist_from")) %>%
  ggplot() +
  geom_point(aes(x=Transcriptomics, y=Proteomics, color=dist_from)) +
  ggtitle("Distances from archetypes when target is transcriptomics") +
  facet_wrap("dist_from", scale="free_y")

scale=FALSE symmetric=FALSE

# distance of proteomics samples from archetypes when proteomics is the target matrix
dist_pp <- distCl.ptL[["FF"]]  %>%
  select(-Archetype) %>%
  rownames_to_column("rnaseq_id") %>%
  pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Proteomics")

# distance of proteomics samples from archetypes when transcriptomics is the target matrix
dist_pg <- distCl.geL[["FF"]]  %>%
  select(-Archetype) %>%
  rownames_to_column("rnaseq_id") %>%
  pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Proteomics")

# distance of rnaseq samples from archetypes in the transcriptomic space
dist_g <- meta.pt %>%
  select(rnaseq_id, Archetype_1:Archetype_3) %>%
  pivot_longer(cols=-rnaseq_id, names_to = "dist_from", values_to = "Transcriptomics")
  
merge(dist_g, dist_pp, all=TRUE, by=c("rnaseq_id", "dist_from")) %>%
  ggplot() +
  geom_point(aes(x=Transcriptomics, y=Proteomics, color=dist_from)) +
  ggtitle("Distances from archetypes when target is proteomics") +
  facet_wrap("dist_from", scale="free_y")

merge(dist_g, dist_pg, all=TRUE, by=c("rnaseq_id", "dist_from")) %>%
  ggplot() +
  geom_point(aes(x=Transcriptomics, y=Proteomics, color=dist_from)) +
  ggtitle("Distances from archetypes when target is transcriptomics") +
  facet_wrap("dist_from", scale="free_y")

Classification

scale=TRUE symmetric=TRUE

table("Proteomics"=distCl.ptL[["TT"]]$Archetype, "Transcriptomics"=meta.pt$Archetype) %>% 
   vcd::mosaic(split_vertical=TRUE, shade=TRUE, 
              labeling_args = list(abbreviate_labs = c(Proteomics = TRUE, Transcriptomics = TRUE), rot_labels=c(0,0), tl_labels = FALSE, tl_varnames = TRUE), 
              main="Target is Proteomics")
  
 table("Proteomics"=distCl.geL[["TT"]]$Archetype, "Transcriptomics"=meta.pt$Archetype) %>% 
  vcd::mosaic(split_vertical=TRUE, shade=TRUE, 
              labeling_args = list(abbreviate_labs = c(Proteomics = TRUE, Transcriptomics = TRUE), rot_labels=c(0,0), tl_labels = FALSE, tl_varnames = TRUE), 
              main="Target is Transcriptomics")

scale=TRUE symmetric=FALSE

table("Proteomics"=distCl.ptL[["TF"]]$Archetype, "Transcriptomics"=meta.pt$Archetype) %>% 
   vcd::mosaic(split_vertical=TRUE, shade=TRUE, 
              labeling_args = list(abbreviate_labs = c(Proteomics = TRUE, Transcriptomics = TRUE), rot_labels=c(0,0), tl_labels = FALSE, tl_varnames = TRUE), 
              main="Target is Proteomics")
  
 table("Proteomics"=distCl.geL[["TF"]]$Archetype, "Transcriptomics"=meta.pt$Archetype) %>% 
   vcd::mosaic(split_vertical=TRUE, shade=TRUE, 
              labeling_args = list(abbreviate_labs = c(Proteomics = TRUE, Transcriptomics = TRUE), rot_labels=c(0,0), tl_labels = FALSE, tl_varnames = TRUE), 
              main="Target is Transcriptomics")

scale=FALSE symmetric=TRUE

table("Proteomics"=distCl.ptL[["FT"]]$Archetype, "Transcriptomics"=meta.pt$Archetype) %>% 
  vcd::mosaic(split_vertical=TRUE, shade=TRUE, 
              labeling_args = list(abbreviate_labs = c(Proteomics = TRUE, Transcriptomics = TRUE), rot_labels=c(0,0), tl_labels = FALSE, tl_varnames = TRUE), 
              main="Target is Proteomics")
  
 table("Proteomics"=distCl.geL[["FT"]]$Archetype, "Transcriptomics"=meta.pt$Archetype) %>% 
  vcd::mosaic(split_vertical=TRUE, shade=TRUE, 
              labeling_args = list(abbreviate_labs = c(Proteomics = TRUE, Transcriptomics = TRUE), rot_labels=c(0,0), tl_labels = FALSE, tl_varnames = TRUE), 
              main="Target is Transcriptomics")

scale=FALSE symmetric=FALSE

table("Proteomics"=distCl.ptL[["FF"]]$Archetype, "Transcriptomics"=meta.pt$Archetype) %>% 
  vcd::mosaic(split_vertical=TRUE, shade=TRUE, 
              labeling_args = list(abbreviate_labs = c(Proteomics = TRUE, Transcriptomics = TRUE), rot_labels=c(0,0), tl_labels = FALSE, tl_varnames = TRUE), 
              main="Target is Proteomics")
## Warning in legend(residuals, gpfun, residuals_type): All residuals are zero.
 table("Proteomics"=distCl.geL[["FF"]]$Archetype, "Transcriptomics"=meta.pt$Archetype) %>% 
   vcd::mosaic(split_vertical=TRUE, shade=TRUE, 
              labeling_args = list(abbreviate_labs = c(Proteomics = TRUE, Transcriptomics = TRUE), rot_labels=c(0,0), tl_labels = FALSE, tl_varnames = TRUE), 
              main="Target is Transcriptomics")

Proteomics differential expression

For each bootstrapped space, reversing the proteomics PCA for the imputed archetypes and finding their centroid. This creates a “population” of 200 samples for each archetype and 200 samples of “control” (the centroid). This then goes into limma to find logFC for each protein and each archetype relative to control, similar to the way logFC was calculated for gene expression data. ## Reconstructing archetype samples

res <- res.ptL[["FT"]] # procrustes results with proteomics as target
BCL <- arcfit$pch_fits$XC # bootstrapped spaces

k = ncol(BCL[[1]]) # number of archetypes
p = nrow(BCL[[1]]) # number of PC's
nb = length(BCL) # number of bootstrapped replicates

# calculate PC scores for the centroid ("control" samples) in each bootstrapped space
Bcontrol <- sapply(BCL, rowMeans)

# Reorganize bootstrapped scores into one matrix including "control" samples (4*nb x p)
BC <-  BCL %>%
  unlist() %>% 
  matrix(nr=p, nc=k*nb) %>% 
  cbind(Bcontrol) %>%
  t()

# impute position of boostrapped archetypes in the proteomics space
BCrot <- predict.proc(res, BC, y.cs=NULL, y.mean=mean.ge) 

# project everything back to original proteomics space and translate to original center
# (reversing the pca procedure)
rotM <- PCpt$rotation # rotation matrix (eigenvectors)
mu <- PCpt$center # means of proteomics data by which pc's were centered
BCpt <- BCrot %*% t(rotM[,1:p]) %>% # rotating
    scale(center = -mu, scale = FALSE) # translating
 
rm(res, BCL, BC, Bcontrol)

Calculating LogFC

types.b <- rep(c(archnames, "Control"), each=nb)
# calculate LogFC using limma
lm.res.pt <- logFC.pt <-list()
for (a in archnames) {
  type <- types.b[types.b %in% c(a, "Control")]
  de.df <-  as.data.frame(t( BCpt[types.b %in% c(a, "Control"),] ))
  fit <- limma::lmFit(de.df, design=model.matrix(~type))
  fit <- limma::eBayes(fit)
  tt <- limma::topTable(fit, number=Inf, coef=2)
  lm.res.pt[[a]] <- tt
  logFC.pt[[a]] <- tt$logFC
  rm(tt,fit, de.df, type)
}

logFC.pt$protein <- colnames(BCpt)
logFC.pt <- as.data.frame(logFC.pt)

# visualize with heatmap
ComplexHeatmap::Heatmap(as.matrix(logFC.pt[,-4]),
                        name="LogFC",
                        show_row_dend = FALSE,
                        show_row_names = FALSE,
                        #row_order = order(logFC.pt[,3])
                        )

Annotations of significant proteins

Gene-Protein associations

Pairwise correlations between top genes and all proteins

There is some aggregation of gene-protein pairwise correlations by archetype.
Archetype_1 = coral; Archetype_2 = green; Archetype_3 = blue

# Correlations between all proteins and all genes, focusing on the top genes
ge.top <- arch.ge[meta.pt$rnaseq_id,topgenes$Ensembl.ID] 
R <- cor(ge.top, data.pt)
rc <- character(nrow(topgenes))
rc <- ifelse(topgenes$Archetype=="Archetype_1", "coral3", 
             ifelse(topgenes$Archetype=="Archetype_2", "green", "blue"))
heatmap(R, RowSideColors = rc)

Overlap of all proteins and all genes

Match gene names with protein names

genes <- LogFC.ge[[1]] %>% select(HGNC.ID) %>% drop_na(HGNC.ID) %>% pull()

gp <- c()
for (gn in genes) {
  gpv <- grep(paste0("^\\d*[;|\\|]*", gn, "[;|\\|]"), colnames(data.pt), value=TRUE)
  if (length(gpv) > 0) {
    gp <- rbind(gp,
              cbind("gene"=gn, "protein"=gpv))
  }  else {
    gp <- rbind(gp,
              cbind("gene"=gn, "protein"=NA_character_))

    }
}

gp <- as.data.frame(gp)

6 of the genes match two proteins each (instead of one)

cbind(gp[duplicated(gp[,"gene"], incomparables = NA_character_),], 
      "protein2"=gp[duplicated(gp[,"gene"], incomparables = NA_character_, fromLast = TRUE),"protein"])
##           gene                           protein                   protein2
## 6473   CSNK2A1 CSNK2A1; CSNK2A1P; CSNK2A3|Q8NEV1                       <NA>
## 6751  RABGAP1L                   RABGAP1L|B7ZAP0             CSNK2A1|P68400
## 7981   POLR2J4                              <NA>            RABGAP1L|Q5R372
## 8395      NACA                       NACA|E9PAV3                NACA|Q13765
## 9820     AKAP7                      AKAP7|Q9P0M2               AKAP7|O43687
## 11456    HLA-A                      HLA-A|P13746 HLA-A; LOC100507703|P01892
## 12843    PDPK1                      PDPK1|Q6A1A2               PDPK1|O15530

Merge protein names with archetypes topgenes

gpp <-topgenes %>%
  select(HGNC.ID, Archetype) %>%
  filter(!is.na(HGNC.ID)) %>% 
  rename(gene=HGNC.ID) %>%
  merge(gp, by="gene", all=TRUE, incomparables=NA, sort=FALSE) %>%
  merge(select(logFC.pt, protein), by="protein", all=TRUE, incomparables=NA, sort=FALSE) %>%
  mutate(Archetype=replace_na(Archetype, "None"))
#%>%
 # mutate("InOut"=if_else(is.na(gene), "Excluded", "Included"))

Overlap between archetype gene sets and proteomic dataset

Number of proteins that are (or are not) included in the gene sets, and number of genes in each set that are included in the proteomic dataset

gpp %>%
  mutate(protein=if_else(is.na(protein), "Out", "In"), 
         gene=if_else(is.na(gene), "Out", "In")) %>% 
 # table() %>% 
  ggplot(mapping=aes(x=gene, fill=protein)) +
    geom_bar(position = "fill") +
    geom_text(aes(label = ..count..), stat = "count", position = "fill", vjust=1.5) +
    facet_wrap(~Archetype)

Correlation between gene expression and protein abundance

gp <- gpp %>% 
  filter(!is.na(protein) & !is.na(gene)) %>%
  merge(select(LogFC.ge[[1]], HGNC.ID, Ensembl.ID), 
        by.x="gene", by.y="HGNC.ID", all.x=TRUE, all.y=FALSE, sort=FALSE, incomparables = NA) %>%
  mutate_all(as.character)

R.gp <- cor(arch.ge[meta.pt$rnaseq_id, gp$Ensembl.ID], data.pt[meta.pt$rnaseq_id, gp$protein]) %>%
  diag() %>%
  as_tibble_col(column_name="ge.pt.cor") %>%
  mutate(gene=gp$gene)

gp <- merge(gp, R.gp, by="gene")

ggplot(filter(gp, Archetype!="None"), aes(x=Archetype, y=ge.pt.cor)) +
  geom_boxplot()

Correlation between gene and protein logFC

lfc.ge <- rbind(cbind(LogFC.ge[[1]], Archetype="Archetype_1"),
                cbind(LogFC.ge[[2]], Archetype="Archetype_2"),
                cbind(LogFC.ge[[3]], Archetype="Archetype_3")) %>%
  filter(!is.na(HGNC.ID)) %>%
  select(gene=HGNC.ID, Archetype, logFC.ge=logFC)
  

lfc.pt <- logFC.pt %>%
    pivot_longer(-protein, names_to = "Archetype", values_to = "logFC.pt") 

gp <- merge(gp, lfc.pt, by=c("protein", "Archetype"), all.x = TRUE, all.y=FALSE) %>%
  merge(lfc.ge, by=c("gene", "Archetype"), all.x = TRUE, all.y=FALSE)

filter(gp, Archetype != "None") %>%
ggplot(aes(x=logFC.ge, y=logFC.pt, color=Archetype)) +
  geom_point() +
  geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'

Cannonical correlations

Pairwise distances between samples are preserved in Procrustes superimposition but not in Canonical Correlations.

cca <- CCorA(Cge, Cpt)

pwd.ge.cc <- as.matrix(dist(cca$Cy))
pwd.pt.cc <- as.matrix(dist(cca$Cx))

pwd.ge <- as.matrix(dist(Cge))
pwd.pt <- as.matrix(dist(Cpt))

pwd.ge.ps <- as.matrix(dist(res.ptL[["FT"]]$Yrot))
pwd.pt.ps <- as.matrix(dist(res.geL[["FT"]]$Yrot))

df <- data.frame(pwd.ge = pwd.ge[upper.tri(pwd.ge)],
                 pwd.pt = pwd.pt[upper.tri(pwd.pt)],
                 pwd.ge.cc = pwd.ge.cc[upper.tri(pwd.ge.cc)],
                 pwd.pt.cc = pwd.pt.cc[upper.tri(pwd.pt.cc)],
                 pwd.ge.ps = pwd.ge.ps[upper.tri(pwd.ge.ps)],
                 pwd.pt.ps = pwd.pt.ps[upper.tri(pwd.pt.ps)])

ggpairs(df)

ggplot(df) +
  geom_point(aes(x=pwd.ge, y=pwd.ge.ps)) +
  ggtitle("Pairwise distances between transcriptomic samples \n before and after Procrustes superimposition") +
  xlab("Original PCA distances") +
  ylab("Superimposed distances")

ggplot(df) +
  geom_point(aes(x=pwd.pt, y=pwd.pt.ps)) +
  ggtitle("Pairwise distances between proteomic samples \n before and after Procrustes superimposition") +
  xlab("Original PCA distances") +
  ylab("Superimposed distances")

ggplot(df) +
  geom_point(aes(x=pwd.ge, y=pwd.ge.cc)) +
  ggtitle("Pairwise distances between transcriptomic samples in PCA vs CCA") +
  xlab("Original PCA distances") +
  ylab("CCA distances")

ggplot(df) +
  geom_point(aes(x=pwd.pt, y=pwd.pt.cc)) +
  ggtitle("Pairwise distances between proteomic samples in PCA vs CCA") +
  xlab("Original PCA distances") +
  ylab("CCA distances")

Output

outpath <- "analyses/rnaseq/6_proteomics_integration/"

Session info

devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.6.2 (2019-12-12)
##  os       macOS Mojave 10.14.6        
##  system   x86_64, darwin15.6.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/New_York            
##  date     2020-09-22                  
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package        * version  date       lib source        
##  assertthat       0.2.1    2019-03-21 [1] CRAN (R 3.6.0)
##  backports        1.1.6    2020-04-05 [1] CRAN (R 3.6.2)
##  broom            0.5.5    2020-02-29 [1] CRAN (R 3.6.0)
##  callr            3.4.3    2020-03-28 [1] CRAN (R 3.6.2)
##  cellranger       1.1.0    2016-07-27 [1] CRAN (R 3.6.0)
##  circlize         0.4.8    2019-09-08 [1] CRAN (R 3.6.0)
##  cli              2.0.2    2020-02-28 [1] CRAN (R 3.6.0)
##  clue             0.3-57   2019-02-25 [1] CRAN (R 3.6.0)
##  cluster          2.1.0    2019-06-19 [1] CRAN (R 3.6.1)
##  codetools        0.2-16   2018-12-24 [1] CRAN (R 3.6.1)
##  colorspace       1.4-1    2019-03-18 [1] CRAN (R 3.6.0)
##  ComplexHeatmap   2.2.0    2019-10-29 [1] Bioconductor  
##  cowplot        * 1.0.0    2019-07-11 [1] CRAN (R 3.6.0)
##  crayon           1.3.4    2017-09-16 [1] CRAN (R 3.6.0)
##  DBI              1.1.0    2019-12-15 [1] CRAN (R 3.6.0)
##  dbplyr           1.4.2    2019-06-17 [1] CRAN (R 3.6.0)
##  desc             1.2.0    2018-05-01 [1] CRAN (R 3.6.0)
##  devtools         2.3.0    2020-04-10 [1] CRAN (R 3.6.1)
##  digest           0.6.25   2020-02-23 [1] CRAN (R 3.6.0)
##  dplyr          * 0.8.5    2020-03-07 [1] CRAN (R 3.6.0)
##  ellipsis         0.3.0    2019-09-20 [1] CRAN (R 3.6.0)
##  evaluate         0.14     2019-05-28 [1] CRAN (R 3.6.0)
##  fansi            0.4.1    2020-01-08 [1] CRAN (R 3.6.0)
##  farver           2.0.3    2020-01-16 [1] CRAN (R 3.6.0)
##  forcats        * 0.5.0    2020-03-01 [1] CRAN (R 3.6.0)
##  fs               1.4.1    2020-04-04 [1] CRAN (R 3.6.2)
##  generics         0.0.2    2018-11-29 [1] CRAN (R 3.6.0)
##  GetoptLong       0.1.8    2020-01-08 [1] CRAN (R 3.6.0)
##  GGally         * 1.5.0    2020-03-25 [1] CRAN (R 3.6.0)
##  ggfortify      * 0.4.9    2020-03-11 [1] CRAN (R 3.6.0)
##  ggplot2        * 3.3.0    2020-03-05 [1] CRAN (R 3.6.0)
##  GlobalOptions    0.1.1    2019-09-30 [1] CRAN (R 3.6.0)
##  glue             1.4.0    2020-04-03 [1] CRAN (R 3.6.2)
##  gridExtra        2.3      2017-09-09 [1] CRAN (R 3.6.0)
##  gtable           0.3.0    2019-03-25 [1] CRAN (R 3.6.0)
##  haven            2.2.0    2019-11-08 [1] CRAN (R 3.6.0)
##  hms              0.5.3    2020-01-08 [1] CRAN (R 3.6.0)
##  htmltools        0.4.0    2019-10-04 [1] CRAN (R 3.6.0)
##  httr             1.4.1    2019-08-05 [1] CRAN (R 3.6.0)
##  jsonlite         1.6.1    2020-02-02 [1] CRAN (R 3.6.0)
##  knitr            1.28     2020-02-06 [1] CRAN (R 3.6.0)
##  labeling         0.3      2014-08-23 [1] CRAN (R 3.6.0)
##  lattice        * 0.20-41  2020-04-02 [1] CRAN (R 3.6.2)
##  lifecycle        0.2.0    2020-03-06 [1] CRAN (R 3.6.0)
##  limma            3.42.2   2020-02-03 [1] Bioconductor  
##  lmtest           0.9-37   2019-04-30 [1] CRAN (R 3.6.0)
##  lubridate        1.7.8    2020-04-06 [1] CRAN (R 3.6.2)
##  magrittr         1.5      2014-11-22 [1] CRAN (R 3.6.0)
##  MASS             7.3-51.5 2019-12-20 [1] CRAN (R 3.6.1)
##  Matrix         * 1.2-18   2019-11-27 [1] CRAN (R 3.6.0)
##  memoise          1.1.0    2017-04-21 [1] CRAN (R 3.6.0)
##  mgcv             1.8-31   2019-11-09 [1] CRAN (R 3.6.0)
##  modelr           0.1.6    2020-02-22 [1] CRAN (R 3.6.0)
##  munsell          0.5.0    2018-06-12 [1] CRAN (R 3.6.0)
##  nlme             3.1-145  2020-03-04 [1] CRAN (R 3.6.0)
##  permute        * 0.9-5    2019-03-12 [1] CRAN (R 3.6.0)
##  pillar           1.4.3    2019-12-20 [1] CRAN (R 3.6.1)
##  pins           * 0.4.1    2020-05-28 [1] CRAN (R 3.6.2)
##  pkgbuild         1.0.6    2019-10-09 [1] CRAN (R 3.6.0)
##  pkgconfig        2.0.3    2019-09-22 [1] CRAN (R 3.6.0)
##  pkgload          1.0.2    2018-10-29 [1] CRAN (R 3.6.0)
##  plyr             1.8.6    2020-03-03 [1] CRAN (R 3.6.0)
##  png              0.1-7    2013-12-03 [1] CRAN (R 3.6.0)
##  prettyunits      1.1.1    2020-01-24 [1] CRAN (R 3.6.0)
##  processx         3.4.2    2020-02-09 [1] CRAN (R 3.6.0)
##  ps               1.3.2    2020-02-13 [1] CRAN (R 3.6.0)
##  purrr          * 0.3.3    2019-10-18 [1] CRAN (R 3.6.0)
##  R6               2.4.1    2019-11-12 [1] CRAN (R 3.6.0)
##  RColorBrewer     1.1-2    2014-12-07 [1] CRAN (R 3.6.0)
##  Rcpp             1.0.4.6  2020-04-09 [1] CRAN (R 3.6.1)
##  readr          * 1.3.1    2018-12-21 [1] CRAN (R 3.6.0)
##  readxl           1.3.1    2019-03-13 [1] CRAN (R 3.6.0)
##  remotes          2.1.1    2020-02-15 [1] CRAN (R 3.6.0)
##  reprex           0.3.0    2019-05-16 [1] CRAN (R 3.6.0)
##  reshape          0.8.8    2018-10-23 [1] CRAN (R 3.6.0)
##  rjson            0.2.20   2018-06-08 [1] CRAN (R 3.6.0)
##  rlang            0.4.5    2020-03-01 [1] CRAN (R 3.6.0)
##  rmarkdown        2.1      2020-01-20 [1] CRAN (R 3.6.0)
##  rprojroot        1.3-2    2018-01-03 [1] CRAN (R 3.6.0)
##  rstudioapi       0.11     2020-02-07 [1] CRAN (R 3.6.0)
##  rvest            0.3.5    2019-11-08 [1] CRAN (R 3.6.0)
##  scales           1.1.0    2019-11-18 [1] CRAN (R 3.6.1)
##  sessioninfo      1.1.1    2018-11-05 [1] CRAN (R 3.6.0)
##  shape            1.4.4    2018-02-07 [1] CRAN (R 3.6.0)
##  stringi          1.4.6    2020-02-17 [1] CRAN (R 3.6.0)
##  stringr        * 1.4.0    2019-02-10 [1] CRAN (R 3.6.0)
##  testthat         2.3.2    2020-03-02 [1] CRAN (R 3.6.0)
##  tibble         * 3.0.0    2020-03-30 [1] CRAN (R 3.6.2)
##  tidyr          * 1.0.2    2020-01-24 [1] CRAN (R 3.6.0)
##  tidyselect       1.0.0    2020-01-27 [1] CRAN (R 3.6.0)
##  tidyverse      * 1.3.0    2019-11-21 [1] CRAN (R 3.6.0)
##  usethis          1.6.0    2020-04-09 [1] CRAN (R 3.6.1)
##  vcd              1.4-7    2020-04-02 [1] CRAN (R 3.6.2)
##  vctrs            0.2.4    2020-03-10 [1] CRAN (R 3.6.0)
##  vegan          * 2.5-6    2019-09-01 [1] CRAN (R 3.6.0)
##  withr            2.1.2    2018-03-15 [1] CRAN (R 3.6.0)
##  xfun             0.12     2020-01-13 [1] CRAN (R 3.6.0)
##  xml2             1.3.1    2020-04-09 [1] CRAN (R 3.6.2)
##  yaml             2.2.1    2020-02-01 [1] CRAN (R 3.6.0)
##  zoo              1.8-7    2020-01-10 [1] CRAN (R 3.6.0)
## 
## [1] /Users/habera/Dropbox/ampad_archetypes/renv/library/R-3.6/x86_64-apple-darwin15.6.0
## [2] /private/var/folders/ls/45q5krvs5xd4m27ks7cmhy80g4hpwp/T/Rtmpp6Mgpo/renv-system-library
## [3] /Library/Frameworks/R.framework/Versions/3.6/Resources/library